load("het_data.rdata")

## commented out for replication files 

#  install.packages("xtable")
#  install.packages("ggplot2")
#  install.packages("rmeta")
#  install.packages("dplyr")
#  install.packages("rdrobust")

library(rdrobust)
library(xtable)
library(ggplot2)
library(dplyr)
library(rmeta)


## Estimate  models using data split on year and covariates
mean_eff <- array(NA, dim = c(14, 2) )
mean_se  <- array(NA, dim = c(14, 2) )
mean_eff_prob <- array(NA, dim = c(14, 2) )
upp_prob  <- array(NA, dim = c(14, 2) )
low_prob  <- array(NA, dim = c(14, 2) )
years <- c(2009, 2013, 2014, 2015)

colnames(mean_eff) <- c("FALSE", "TRUE")

rownames(mean_se)  <- rownames(mean_eff)
colnames(mean_se)  <- colnames(mean_eff)

rownames(mean_eff_prob)  <- rownames(mean_eff)
colnames(mean_eff_prob)  <- colnames(mean_eff)
rownames(upp_prob)  <- rownames(mean_eff)
colnames(upp_prob)  <- colnames(mean_eff)
rownames(low_prob)  <- rownames(mean_eff)
colnames(low_prob)  <- colnames(mean_eff)

for (i in 1:5){
  for (k in 1:2){
    colnames(data_list[[i]])[3] <- "var" 
    
    data <- 
      data.frame(data_list[[i]]) %>%
      mutate(running = .[, 1]) %>% 
      filter(var == k - 1) %>% 
      mutate(treated = running > 0) 
    
    data_close <- 
      data %>%
      ungroup() %>%
      mutate(voted     = round(par_vote*n),
             abstained = round((1-par_vote)*n)) %>% 
      select(year, running, treated, voted, abstained)
    
    data_voted <- 
      as.data.frame(lapply(data_close, 
                           function(x,p) rep(x,p), 
                           data_close[["voted"]])) %>%
      mutate(voted = 1)
    
    data_abstained <- 
      as.data.frame(lapply(data_close, 
                           function(x,p) rep(x,p), 
                           data_close[["abstained"]])) %>%
      mutate(voted = 0)
    
    data_master <- 
      rbind(data_voted, data_abstained)
    
    coef     <- rep(NA, 4)
    coef_rob <- rep(NA, 4)
    se       <- rep(NA, 4)
    se_rob   <- rep(NA, 4)
    
    for(j in 1:4){
      sub_data <- 
        data_master %>%
        filter(year == years[j])
      
      mod <- 
        with(sub_data, rdrobust(y = voted, x = running))
      
      coef[j]     <- mod$coef[1]
      coef_rob[j] <- mod$coef[3]
      se[j]       <- mod$se[1]
      se_rob[j]   <- mod$se[3]
    }
    
    # Pool results using fixed effects metaanalysis
    
    mean_vote <- with(subset(data_master, running < 0), mean(voted))
    
    mean_eff[i,k] <- meta.summaries(coef, se)$summary
    mean_se[i,k]  <- meta.summaries(coef, se)$se.summary
    mean_eff_prob[i,k] <- 
      qnorm(mean_eff[i, k] + mean_vote) - 
      qnorm(mean_vote)
    upp_prob[i,k] <- 
      qnorm(mean_eff[i, k] + mean_vote + qnorm(0.975)*mean_se[i, k]) - 
      qnorm(mean_vote)
    low_prob[i,k] <- 
      qnorm(mean_eff[i, k] + mean_vote + qnorm(0.025)*mean_se[i, k]) - 
      qnorm(mean_vote)  
    
    mean_eff[i + 7,k] <- meta.summaries(coef_rob, se_rob)$summary
    mean_se[i + 7,k]  <- meta.summaries(coef_rob, se_rob)$se.summary
    mean_eff_prob[i + 7,k] <- 
      qnorm(mean_eff[i + 7,k] + mean_vote) - 
      qnorm(mean_vote)
    upp_prob[i + 7,k] <- 
      qnorm(mean_eff[i + 7,k] + mean_vote + qnorm(0.975)*mean_se[i + 7,k]) - 
      qnorm(mean_vote)
    low_prob[i + 7,k] <- 
      qnorm(mean_eff[i + 7,k] + mean_vote + qnorm(0.025)*mean_se[i + 7,k]) - 
      qnorm(mean_vote)  
    
    
    print(c(i, k)) 
  }
}

for (i in 6:7){
  for (k in 1:2){
    data <- 
      data.frame(data_list[[6]]) %>%
      mutate(running = .[, 1]) %>% 
      filter(par_female == i - 6) %>%
      mutate(treated = running > 0)
    
    if(k == 1) data <- subset(data, child_female == "Mand")
    if(k == 2) data <- subset(data, child_female == "Kvinde")
    
    data_close <- 
      data %>%
      ungroup() %>%
      mutate(voted     = round(par_vote*n),
             abstained = round((1-par_vote)*n)) %>% 
      select(year, running, treated, voted, abstained)
    
    data_voted <- 
      as.data.frame(lapply(data_close, 
                           function(x,p) rep(x,p), 
                           data_close[["voted"]])) %>%
      mutate(voted = 1)
    
    data_abstained <- 
      as.data.frame(lapply(data_close, 
                           function(x,p) rep(x,p), 
                           data_close[["abstained"]])) %>%
      mutate(voted = 0)
    
    data_master <- 
      rbind(data_voted, data_abstained)
    
    for(j in 1:4){
      sub_data <- 
        data_master %>%
        filter(year == years[j])
      
      mod <- 
        with(sub_data, rdrobust(y = voted, x = running))
      
      coef[j]     <- mod$coef[1]
      coef_rob[j] <- mod$coef[3]
      se[j]       <- mod$se[1]
      se_rob[j]   <- mod$se[3]
    }
    
    # Pool results using fixed effects metaanalysis
    
    mean_vote <- with(subset(data_master, running < 0), mean(voted))
    
    mean_eff[i,k] <- meta.summaries(coef, se)$summary
    mean_se[i,k]  <- meta.summaries(coef, se)$se.summary
    mean_eff_prob[i,k] <- 
      qnorm(mean_eff[i, k] + mean_vote) - 
      qnorm(mean_vote)
    upp_prob[i,k] <- 
      qnorm(mean_eff[i, k] + mean_vote + qnorm(0.975)*mean_se[i, k]) - 
      qnorm(mean_vote)
    low_prob[i,k] <- 
      qnorm(mean_eff[i, k] + mean_vote + qnorm(0.025)*mean_se[i, k]) - 
      qnorm(mean_vote)  
    
    mean_eff[i + 7,k] <- meta.summaries(coef_rob, se_rob)$summary
    mean_se[i + 7,k]  <- meta.summaries(coef_rob, se_rob)$se.summary
    mean_eff_prob[i + 7,k] <- 
      qnorm(mean_eff[i + 7,k] + mean_vote) - 
      qnorm(mean_vote)
    upp_prob[i + 7,k] <- 
      qnorm(mean_eff[i + 7,k] + mean_vote + qnorm(0.975)*mean_se[i + 7,k]) - 
      qnorm(mean_vote)
    low_prob[i + 7,k] <- 
      qnorm(mean_eff[i + 7,k] + mean_vote + qnorm(0.025)*mean_se[i + 7,k]) - 
      qnorm(mean_vote)  
  }
  print(i)
}

# Plotting ----------------------------------------------------------------

mean_eff <- mean_eff * 100
mean_se  <- mean_se  * 100

effs <- mean_eff[1,]
low  <- mean_eff[1,] + mean_se[1,] * qnorm(0.025) 
upp  <- mean_eff[1,] + mean_se[1,] * qnorm(0.975) 


for (i in 2:14){
  effs <- c(effs, mean_eff[i,])
  low  <- c(low, mean_eff[i,] + mean_se[i,] * qnorm(0.025) ) 
  upp  <- c(upp, mean_eff[i,] + mean_se[i,] * qnorm(0.975) )
  
}


for (i in 1:14){
  effs <- c(effs, mean_eff_prob[i,])
  low  <- c(low , low_prob[i,])
  upp  <- c(upp , upp_prob[i,])
}

labels <- rep(c("Age below median (parent)",
                "Age above median (parent)",
                "Income below median (parent)",
                "Income above median (parent)",
                "No degree (parent)",
                "Has degree (parent)",
                "Non-native (parent)",
                "Native (parent)",
                "Child not oldest",
                "Child oldest",
                "Father x Son",
                "Father x Daughter",
                "Mother x Son",
                "Mother x Daughter"))

place  <- rep(rev(c(1, 2, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19)), 4)
#parent <- rep(c("Parent does have \nhigh school", "Parent does not \nhave high school"), 8)
type   <- rep(rep(c("Conventional RD", "Robust RD"), each = 14), 2)
prob   <- rep(c("Effect size in percentage points", "Effect size in probits"), each = 28)


plot_data <- 
  data_frame(effs, upp, low, place, type, prob)


plot <- 
  ggplot(data = plot_data,
         aes(y = place,
             x = effs,
             xmin = low,
             xmax = upp)) +
  geom_errorbarh(height = 0) +
  facet_grid(type ~ prob, scales = "free_x") +
  scale_y_continuous(breaks = place[1:14], labels = labels) +
  theme_classic() + 
  theme(axis.text = element_text(size = 12),
        axis.title = element_text(size = 20),
        legend.text = element_text(size = 16),
        legend.title= element_blank(), 
        legend.position = "top",
        panel.spacing = unit(2, "lines")) +
  ylab("") + 
  xlab("") +
  geom_point(size = 3) +
  scale_alpha_discrete(range = c(0.5, 1)) +
  geom_vline(xintercept = 0, linetype = 3, alpha = 0.5)

# Is the pooled difference in percentage points greater among parents of daughters? 
# sample from distributions of parents and compare differences
set.seed(131004)
samp_size <- 10000000

mot_dau <- rnorm(samp_size, mean_eff[7,2], mean_se[7,2])
fat_dau <- rnorm(samp_size, mean_eff[6,2], mean_se[6,2])
mot_son <- rnorm(samp_size, mean_eff[7,1], mean_se[7,1])
fat_son <- rnorm(samp_size, mean_eff[6,1], mean_se[6,1])

dif <- (mot_dau + fat_dau - (mot_son + fat_son))/2
mean(dif)
quantile(dif, prob = c(0.025, 0.5, 0.975))

# Create table with baseline turnout rates. 
# That is table with turnout and group size for only those not exposed 
tab <- matrix(NA, nrow = 28, ncol = 5)

for (i in 1:5){
  for (k in 1:2){
    data <-
      data.frame(data_list[[i]]) %>%
      mutate(running = .[, 1]) %>% 
      filter(var == k - 1 & running < 0)
    turnout <- round(with(data, weighted.mean(par_vote, n)) * 100, 2)
    n       <- sum(data$n)    
    #tab[2*i - 2 + k, 5] <- paste(turnout, " (", n, ")", sep = "")
    tab[4*i - 5 + 2*k,5] <- paste(turnout)
    tab[4*i - 4 + 2*k,5] <- paste("(", n, ")", sep = "")
    for(j in 1:4){
      turnout <- round(with(subset(data, year == years[j]), weighted.mean(par_vote, n)) * 100, 2)
      n       <- with(subset(data, year == years[j]), sum(n))
      #tab[2*i - 2 + k, j] <- paste(turnout, " (", n, ")", sep = "")
      tab[4*i - 5 + 2*k,j] <- paste(turnout)
      tab[4*i - 4 + 2*k,j] <- paste("(", n, ")", sep = "")
      
    }
  }
}

for (i in 6:7){
  for (k in 1:2){
    data <- 
      data.frame(data_list[[6]]) %>%
      filter(par_female == i - 6)
    if(k == 1) data <- subset(data, child_female == "Mand")
    if(k == 2) data <- subset(data, child_female == "Kvinde")
    turnout <- round(with(data, weighted.mean(par_vote, n)) * 100, 2)
    n       <- sum(data$n)    
    #    tab[2*i - 2 + k, 5] <- paste(turnout, " (", n, ")", sep = "")
    tab[4*i - 5 + 2*k,5] <- paste(turnout)
    tab[4*i - 4 + 2*k,5] <- paste("(", n, ")", sep = "")
    for(j in 1:4){
      turnout <- round(with(subset(data, year == years[j]), weighted.mean(par_vote, n)) * 100, 2)
      n       <- with(subset(data, year == years[j]), sum(n))
      #      tab[2*i - 2 + k, j] <- paste(turnout, " (", n, ")", sep = "")
      tab[4*i - 5 + 2*k,j] <- paste(turnout)
      tab[4*i - 4 + 2*k,j] <- paste("(", n, ")", sep = "")
      
    }
  }
}

colnames(tab) <- c(2009, 2013, 2014, 2015, "Pooled")
new_lab <- rep(NA, 24) 
for (i in 1:14){
  new_lab[2*i - 1] <- labels[i]
  new_lab[2*i]     <- i
}
rownames(tab) <- new_lab
xtable(tab)


